import re

import requests

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                  'Chrome/90.0.4430.212 Safari/537.36',
    'Cookie': 'bid=RJSPRK1rFbY; push_noty_num=0; push_doumail_num=0; dbcl2="226248034:/LZTT2nFj7A"; ct=y; '
              '__utmv=30149280.22624; '
              '_vwo_uuid_v2=DA406FF64603A72C3E921E35574304627|b75b5b79a8b38752f96ee3915bd3b5f1; '
              '__utmz=223695111.1621084205.3.3.utmcsr=douban.com|utmccn=(referral)|utmcmd=referral|utmcct=/; '
              'ap_v=0,6.0; __utmz=30149280.1621157885.9.4.utmcsr=cn.bing.com|utmccn=('
              'referral)|utmcmd=referral|utmcct=/; ck=5eiX; _pk_ref.100001.4cf6=["","",1621161708,'
              '"https://www.douban.com/"]; _pk_ses.100001.4cf6=*; '
              '__utma=30149280.1866918799.1592112879.1621157885.1621161709.10; '
              '__utmb=30149280.0.10.1621161709; __utmc=30149280; '
              '__utma=223695111.588405269.1620652637.1621084205.1621161709.4; '
              '__utmb=223695111.0.10.1621161709; __utmc=223695111; '
              '_pk_id.100001.4cf6=ab4cca199d791460.1620652637.4.1621161716.1621086046. '
}


def top_250():
    """
    爬取豆瓣电影排行榜250
    :return: movie_list 电影列表
    """
    movie_list = []
    for i in range(10):
        url = 'https://movie.douban.com/top250?start=%s&filter=' % (i * 25)
        r = requests.get(url=url, headers=headers)
        rule = re.compile(r'<li>.*?<div class="item">.*?<span class="title">'
                          r'(?P<title>.*?)</span>.*?<span class="rating_num" property="v:average">'
                          r'(?P<score>.*?)</span>', re.S)
        result = rule.finditer(r.text)
        movie_list.extend([[s.group('title'), s.group('score')] for s in result])
        r.close()
    return movie_list


if __name__ == '__main__':
    movie_list = top_250()
    for movie in movie_list:
        print(movie)
